package com.galeno.sparksql

import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/9/421:35
 */
case class Grade(level:Int,stuCount:Int,teacher:String)
object SparkSql06 {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache").setLevel(Level.WARN)
    val spark = SparkSession.builder()
      .appName("hjehe")
      .master("local[*]")
      .getOrCreate()
    //获取sc
    val sc = spark.sparkContext

    /**
     *
     * 将元组转换为dataFrame
     *
     */
    val rdd1: RDD[(String, Int)] = sc.makeRDD(Seq(("zhangsan", 15), ("lisi", 19), ("wangwu", 15), ("zhaoliu", 22)))
    val df1: DataFrame = spark.createDataFrame(rdd1)
    df1.printSchema()
    df1.show()

    /**
     * 利用隐式转换工具  直接在rdd上调toDF
     *
     */
    import spark.implicits._
    val df1tran: DataFrame = rdd1.toDF
    df1tran.show()
    df1tran.printSchema()

    df1tran.where("_2>10").select("_1","_2").show()


    println("*"*100)

    /**
     * 二、将case class 的 rdd转成dataframe
     */
    val rdd2: RDD[Grade] = sc.makeRDD(Seq(
      Grade(1,50,"james"),
      Grade(2,55,"curry"),
      Grade(3,60,"zimuge"),
      Grade(4,40,"hador")
    ))

    val df2: DataFrame = rdd2.toDF
    df2.show()

    println("*"*100)

    /**
     * 三、将 javaBean 的 rdd转成dataframe
     */
    println("============== df4 ================")
    val rdd3: RDD[JavaBean01] = sc.makeRDD(Seq(
      new JavaBean01(1,50,"张三"),
      new JavaBean01(2,55,"李赛"),
      new JavaBean01(3,60,"王五"),
      new JavaBean01(4,40,"赵六")
    ))
    val df3 = spark.createDataFrame(rdd3, classOf[JavaBean01])
    df3.printSchema()
    df3.show()


    /**
     * 四、将 scala Bean 的 rdd转成 dataframe
     * 注意： scalaBean中的属性需要加上 @BeanProperty 注解
     */
    var rdd4= sc.makeRDD(List(
      new ScalaBean01(1,2,"张三"),
      new ScalaBean01(1,2,"张三"),
      new ScalaBean01(1,2,"张三"),
      new ScalaBean01(1,2,"张三")
    ))
    val df4 = spark.createDataFrame(rdd4, classOf[ScalaBean01])
    df4.show()
    df4.printSchema()

    /**
     * 五、将 Row 的 rdd转成 dataframe
     *
     */
    println("============== df5 ================")
    val rdd7 = sc.makeRDD(Seq(
      Row(1,"zs",18),
      Row(1,"zs",18),
      Row(1,"zs",18),
      Row(1,"zs",18),
      Row(1,"zs",18)
    ))
    val df7 = spark.createDataFrame(rdd7, StructType(Seq(
        StructField("id", DataTypes.IntegerType),
        StructField("name", DataTypes.StringType),
        StructField("age", DataTypes.IntegerType)
      )))
    df7.printSchema()
    df7.show()

















  }

}
